msg_tool\scripts\favorite/
disasm.rs

1use crate::ext::io::*;
2use crate::types::*;
3use crate::utils::encoding::*;
4use anyhow::Result;
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7use std::io::{Read, Seek, SeekFrom};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10enum Oper {
11    // Byte
12    B,
13    // Word
14    W,
15    // Double Word
16    D,
17    // String
18    S,
19    // Float
20    F,
21}
22
23use Oper::*;
24
25#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
26#[serde(tag = "t", content = "c")]
27pub enum Operand {
28    B(u8),
29    W(u16),
30    D(u32),
31    S(String),
32    F(f32),
33}
34
35impl Operand {
36    pub fn len(&self, encoding: Encoding) -> Result<usize> {
37        Ok(match self {
38            Operand::B(_) => 1,
39            Operand::W(_) => 2,
40            Operand::D(_) => 4,
41            Operand::S(s) => {
42                let bytes = encode_string(encoding, s, true)?;
43                // null terminator + length byte
44                bytes.len() + 2
45            }
46            Operand::F(_) => 4,
47        })
48    }
49}
50
51const OPS: [(u8, &[Oper]); 49] = [
52    (0x00, &[]),     //noop
53    (0x01, &[B, B]), //initstack
54    (0x02, &[D]),    //call
55    (0x03, &[W]),    //syscall
56    (0x04, &[]),     //ret
57    (0x05, &[]),     //ret2
58    (0x06, &[D]),    //jmp
59    (0x07, &[D]),    //jmpcond
60    (0x08, &[]),     //pushtrue
61    (0x09, &[]),     //pushfalse
62    (0x0a, &[D]),    //pushint
63    (0x0b, &[W]),    //pushint
64    (0x0c, &[B]),    //pushint
65    (0x0d, &[F]),    //pushfloat * unused
66    (0x0e, &[S]),    //pushstring
67    (0x0f, &[W]),    //pushglobal
68    (0x10, &[B]),    //pushstack
69    (0x11, &[W]),    //unknown
70    (0x12, &[B]),    //unknown
71    (0x13, &[]),     //pushtop
72    (0x14, &[]),     //pushtmp
73    (0x15, &[W]),    //popglobal
74    (0x16, &[B]),    //copystack
75    (0x17, &[W]),    //unknown
76    (0x18, &[B]),    //unknown
77    (0x19, &[]),     //neg
78    (0x1a, &[]),     //add
79    (0x1b, &[]),     //sub
80    (0x1c, &[]),     //mul
81    (0x1d, &[]),     //div
82    (0x1e, &[]),     //mod
83    (0x1f, &[]),     //test
84    (0x20, &[]),     //logand
85    (0x21, &[]),     //logor
86    (0x22, &[]),     //eq
87    (0x23, &[]),     //neq
88    (0x24, &[]),     //gt
89    (0x25, &[]),     //le
90    (0x26, &[]),     //lt
91    (0x27, &[]),     //ge
92    (0x33, &[]),
93    (0x3f, &[]),
94    (0x40, &[]),
95    (0xb3, &[]),
96    (0xb8, &[]),
97    (0xd8, &[]),
98    (0xf0, &[]),
99    (0x52, &[]),
100    (0x9e, &[]),
101];
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct Func {
105    pub pos: u64,
106    pub opcode: u8,
107    pub operands: Vec<Operand>,
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct Data {
112    pub functions: Vec<Func>,
113    pub main_script: Vec<Func>,
114    pub extra_data: Vec<u8>,
115    #[serde(skip)]
116    speak_func_indices: HashSet<u32>,
117    #[serde(skip)]
118    func_pos_map: HashMap<u64, usize>,
119    #[serde(skip)]
120    speaker_names: HashMap<usize, Vec<String>>,
121    pub sys_imports: Vec<String>,
122}
123
124impl Data {
125    pub fn disasm<R: Read + Seek>(mut reader: R, encoding: Encoding) -> Result<Self> {
126        let mut data = Data {
127            functions: Vec::new(),
128            main_script: Vec::new(),
129            extra_data: Vec::new(),
130            speak_func_indices: HashSet::new(),
131            func_pos_map: HashMap::new(),
132            speaker_names: HashMap::new(),
133            sys_imports: Vec::new(),
134        };
135        let script_len = reader.read_u32()? as u64;
136        let main_script_data = reader.peek_u32_at(script_len)? as u64;
137        {
138            let mut target = &mut data.functions;
139            let mut pos = reader.stream_position()?;
140            while pos < script_len {
141                if pos >= main_script_data {
142                    target = &mut data.main_script;
143                }
144                target.push(Self::read_func(&mut reader, encoding)?);
145                pos = reader.stream_position()?;
146            }
147        }
148        reader.seek(SeekFrom::Start(script_len + 4))?;
149        reader.read_to_end(&mut data.extra_data)?;
150        let mut off = script_len + 10;
151        let offset = reader.peek_u8_at(off)?;
152        off += 1 + offset as u64;
153        let sysimport_num = reader.peek_u16_at(off)?;
154        off += 2;
155        for _ in 0..sysimport_num {
156            let s = reader.peek_cstring_at(off + 2)?;
157            let s = decode_to_string(encoding, s.as_bytes(), true)?;
158            data.sys_imports.push(s);
159            off += 2 + reader.peek_u8_at(off + 1)? as u64;
160        }
161        data.index_functions();
162        data.find_speak_functions();
163        data.collect_speaker_names();
164
165        Ok(data)
166    }
167
168    fn index_functions(&mut self) {
169        for (idx, func) in self.functions.iter().enumerate() {
170            if func.opcode == 0x01 {
171                self.func_pos_map.insert(func.pos, idx);
172            }
173        }
174    }
175
176    fn find_speak_functions(&mut self) {
177        for (idx, func) in self.functions.iter().enumerate() {
178            if func.opcode == 0x01 {
179                // SPEAK functions have initstack with (3, 0) or (5, 0) parameters
180                if let (Some(Operand::B(arg_count)), Some(Operand::B(0))) =
181                    (func.operands.first(), func.operands.get(1))
182                {
183                    if *arg_count == 3 || *arg_count == 5 {
184                        self.speak_func_indices.insert(idx as u32);
185                    }
186                }
187            }
188        }
189    }
190
191    fn collect_speaker_names(&mut self) {
192        let func_starts: Vec<usize> = self
193            .functions
194            .iter()
195            .enumerate()
196            .filter(|(_, f)| f.opcode == 0x01)
197            .map(|(i, _)| i)
198            .collect();
199
200        for &speak_idx in &self.speak_func_indices {
201            let speak_idx = speak_idx as usize;
202
203            let start_pos = func_starts.iter().position(|&s| s == speak_idx);
204            if let Some(pos) = start_pos {
205                let end = func_starts
206                    .get(pos + 1)
207                    .copied()
208                    .unwrap_or(self.functions.len());
209                let names: Vec<String> = (speak_idx..end)
210                    .filter(|&i| self.functions[i].opcode == 0x0e)
211                    .filter_map(|i| match self.functions[i].operands.first() {
212                        Some(Operand::S(s)) if !s.trim().is_empty() => Some(s.clone()),
213                        _ => None,
214                    })
215                    .collect();
216
217                if !names.is_empty() {
218                    self.speaker_names.insert(speak_idx, names);
219                }
220            }
221        }
222    }
223
224    fn get_speaker(&self, func_idx: usize) -> Option<String> {
225        let names = self.speaker_names.get(&func_idx)?;
226
227        // Prefer names without '?' prefix, take the last one (usually the "known" name)
228        if let Some(name) = names.iter().filter(|n| !n.contains('?')).last() {
229            return Some(name.trim().to_string());
230        }
231
232        // If all names have '?', strip it from the last one
233        names.last().and_then(|name| {
234            let cleaned = name.trim().trim_start_matches('?').trim();
235            if !cleaned.is_empty() {
236                Some(cleaned.to_string())
237            } else {
238                None
239            }
240        })
241    }
242
243    pub fn extract_messages(&self, filter_ascii: bool) -> Vec<(Option<String>, String)> {
244        let mut messages = Vec::new();
245
246        // Extract strings from functions section (no speakers)
247        for func in &self.functions {
248            if func.opcode == 0x0e {
249                if let Some(Operand::S(s)) = func.operands.first() {
250                    if !(filter_ascii && s.chars().all(|c| c.is_ascii())) {
251                        messages.push((None, s.clone()));
252                    }
253                }
254            }
255        }
256
257        // Process main_script, track SPEAK calls for speaker names
258        let mut current_speaker: Option<String> = None;
259
260        for func in &self.main_script {
261            if func.opcode == 0x02 {
262                if let Some(Operand::D(call_target)) = func.operands.first() {
263                    if let Some(&func_idx) = self.func_pos_map.get(&(*call_target as u64)) {
264                        if self.speak_func_indices.contains(&(func_idx as u32)) {
265                            current_speaker = self.get_speaker(func_idx);
266                        }
267                    }
268                }
269            } else if func.opcode == 0x0e {
270                if let Some(Operand::S(s)) = func.operands.first() {
271                    if !(filter_ascii && s.chars().all(|c| c.is_ascii())) {
272                        messages.push((current_speaker.clone(), s.clone()));
273                    }
274                }
275            }
276        }
277
278        messages
279    }
280
281    fn read_func<R: Read + Seek>(reader: &mut R, encoding: Encoding) -> Result<Func> {
282        let pos = reader.stream_position()?;
283        let opcode = reader.read_u8()?;
284        let operands = if let Some((_, ops)) = OPS.iter().find(|(code, _)| *code == opcode) {
285            let mut operands = Vec::with_capacity(ops.len());
286            for &op in *ops {
287                let operand = match op {
288                    B => Operand::B(reader.read_u8()?),
289                    W => Operand::W(reader.read_u16()?),
290                    D => Operand::D(reader.read_u32()?),
291                    S => {
292                        let len = reader.read_u8()? as usize;
293                        let s = reader.read_cstring()?;
294                        if s.as_bytes_with_nul().len() != len {
295                            return Err(anyhow::anyhow!(
296                                "String length mismatch at {:#x}: expected {}, got {}",
297                                pos,
298                                len,
299                                s.as_bytes_with_nul().len()
300                            ));
301                        }
302                        let s = decode_to_string(encoding, s.as_bytes(), true)?;
303                        Operand::S(s)
304                    }
305                    F => Operand::F(reader.read_f32()?),
306                };
307                operands.push(operand);
308            }
309            operands
310        } else {
311            return Err(anyhow::anyhow!(
312                "Unknown opcode: {:#x} at {:#x}",
313                opcode,
314                pos
315            ));
316        };
317        Ok(Func {
318            pos,
319            opcode,
320            operands,
321        })
322    }
323}